Â
 Â
# PLOTTING GRAPH of 3 Year Chicago Public Transportation Trend + Weather
# WINTER
# Shading Winters From 21st December to 30th March
# Reference: https://stackoverflow.com/questions/18419628/creating-custom-legends-in-ggplot2
df<-data.frame(xmin=as.Date(c('2015-01-01','2015-12-21','2016-12-21', '2017-12-21' )),
xmax=as.Date(c('2015-03-20','2016-03-30', '2017-03-30', '2017-12-31' )),
ymin=c(-Inf,-Inf),
ymax=c(Inf,Inf),
winters=c("Winters"))
total_demand_by_day %>%
ggplot(aes(x = date, y = riders/1000)) +
geom_smooth(aes(color = trip_identity), se = FALSE, na.rm = TRUE) +
geom_rect(data=df,aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax, fill = 'winters'),
alpha=0.15,inherit.aes=FALSE, fill = winter_color) +
scale_color_manual(values=c(bus_color,divvy_color, taxi_color, train_color)) +
scale_x_date(date_labels = "%d %b %Y") +
theme_bw() +
my_theme +
theme(legend.position="none") +
scale_y_continuous(labels = scales::comma) +
geom_vline(xintercept = as.Date(c("2015-01-01", "2016-01-01", "2017-01-01", "2018-01-01")),
linetype = 4) +
annotate("label", x = as.Date("2015-06-01"), y = 500,
label = 'TRAIN', color = train_color, fontface =2, family="Courier", size = 10) +
annotate("label", x = as.Date("2016-06-01"), y = 670,
label = 'BUS', color = bus_color, fontface =2, family="Courier", size = 10) +
annotate("label", x = as.Date("2016-06-01"), y = 100,
label = 'TAXI', color = taxi_color, fontface =2, family="Courier", size = 10) +
annotate("label", x = as.Date("2015-05-01"), y = 40,
label = 'DIVVY', color = divvy_color, fontface =2, family="Courier", size = 10) +
annotate("label", x = as.Date("2016-03-21"), y = 320,
label = 'WINTER', color = winter_color, fontface =2, family="Courier", size = 10) +
labs(title ="Demand for Public Transport & Divvy In Chicago Is Lowest In Winter",
subtitle = "Taxi Usage Less Elastic To Seasonal Changes",
x = "", y = "Passengers Per Day (Thousands)",
caption = "CTA Ridership Data\n 2015-2017\n *Taxi Data till July 2017") +
annotate("text", x = as.Date("2017-08-10"), y = 30,
label = '*', fontface =2, family="Courier")
Between 2015 - 2017, there are two very clear trends: 1) For Trains & Divvy bikes, there is a prominent seasonal variation. In the summer, demand peaks, while in the winter, demand is at its lowest. 2) Demand for Buses & Taxis has decreased throughout this time period. There also doesn’t seem to be a seasonal component accompanying this decline. These patterns can impact the allocation and deployment of transportation services by the CTA across seasons. One possible explanation for the decline of demand for buses and taxis might be an increased reliance on ride-share services such as Uber, Lyft etc.
   Â
 Â
temp <-
public_transport %>%
group_by(dayofweek, trip_identity) %>%
summarise(count = (sum(rides)/(3*52.19))/1000)
bus_point <-
temp %>%
filter(trip_identity == "bus")
train_point <-
temp %>%
filter(trip_identity == "train")
ggplot() +
geom_point(data = bus_point, aes(x = dayofweek, y = count), colour = bus_color, size = 5, stroke = 1) +
geom_point(data = train_point, aes(x = dayofweek, y = count), colour = train_color, size = 5, stroke = 1) +
geom_line(data = temp, mapping = aes(x = dayofweek, y=count), alpha = 0.3) +
scale_y_continuous(labels = scales::comma) +
coord_cartesian( ylim = c(200, 900)) +
labs(title ="Usage of Public Transport in Chicago is Highest on Weekdays",
subtitle = "Weekends see a significant drop in riders",
x = "Day of Week", y = "Average Daily Passengers (Thousands)",
caption = "CTA Ridership Data\n 2015-2017") +
theme_bw() +
theme(plot.title = element_text(hjust = -0, size = 12, face = "bold"),
plot.subtitle = element_text(size = 10, face = "bold"),
legend.title=element_blank(),
legend.key = element_rect(fill = "white")) +
coord_flip() +
my_theme +
annotate("label", x = 5.5, y = 370,
label = 'TRAIN', color = train_color, fontface =2, family="Courier", size = 15) +
annotate("label", x = 5.5, y = 510,
label = 'BUS', color = bus_color, fontface =2, family="Courier", size = 15) +
theme(legend.position="none",
axis.title.y=element_blank(),
axis.ticks.y=element_blank()
)
For buses and trains, comparisons within a week indicate that highest usage is during weekdays, possibly due to work or school transits. Deployments of public transportation are also more spread out on weekends by the CTA to ensure more efficient use.
 Â
taxi_num_days <- n_distinct(date(taxi_start_times_date$date))
divvy_num_days <- n_distinct(date(divvy_trips$date))
taxi_start_times_date$hour <- hour(taxi_start_times_date$date)
taxi_start_times_date$weekday <- lubridate::wday(taxi_start_times_date$date, label = TRUE)
taxi_by_weekday <-
taxi_start_times_date %>%
group_by(weekday) %>%
summarise(count = n())
divvy_by_weekday <-
divvy_trips %>%
group_by(weekday = wday(divvy_trips$date)) %>%
summarize(count = n())
taxi_by_weekday$weekday <- factor(taxi_by_weekday$weekday, levels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))
ggplot() +
geom_point(data = taxi_by_weekday, aes(x = weekday, y = count/taxi_num_days),
colour = taxi_color, size = 5, stroke = 1) +
geom_point(data = divvy_by_weekday, aes(x = weekday, y = count/divvy_num_days),
colour = divvy_color, size = 5, stroke = 1) +
scale_y_continuous(labels = scales::comma) +
#coord_cartesian( ylim = c(200, 900)) +
labs(title ="Taxi Demand Peaks Throughout Week, Then Falls On Weekend",
subtitle = "Divvy Bike Demand Remains Constant",
x = "Day of Week", y = "Average Daily Trips",
caption = "Chicago Taxi & Divvy\n Ridership Data 2015-2017") +
theme_bw() +
theme(plot.title = element_text(hjust = -0, size = 12, face = "bold"),
plot.subtitle = element_text(size = 10, face = "bold"),
legend.title=element_blank(),
legend.key = element_rect(fill = "white")) +
#coord_flip() +
my_theme +
annotate("label", x = 3.5, y = 6800, size = 15,
label = 'TAXI', color = taxi_color, fontface =2, family="Courier") +
annotate("label", x = 3, y = 2710, size = 15,
label = 'DIVVY', color = divvy_color, fontface =2, family="Courier") +
theme(legend.position="none",
axis.title.y=element_blank(),
axis.ticks.y=element_blank()
)
The first interesting observation is that demand for divvy doesn’t vary much throughout the week, even though one might expect weekends to have higher demand, especially in downtown areas. The second interesting observation is that demand for taxis keeps increasing between Monday - Thursday even though there isn’t any significant different in activities/behaviors of individuals between those days. Some of the increase in demand for Friday can be attributed to Friday-Night outgoing behavior. While weekends see a decline, Sunday is much lower than Saturday, which is also surprising.
 Â
taxi_start_times_solo <- as.data.frame(taxi_start_times)
taxi_light <- "#ffffb2"
taxi_dark <- "#cc4c02"
taxi_start_times_solo$hour <- hour(taxi_start_times_solo$taxi_start_times)
taxi_start_times_solo$weekday <- lubridate::wday(taxi_start_times_solo$taxi_start_times, label = TRUE)
taxi_by_hour <-
taxi_start_times_solo %>%
group_by(weekday, hour) %>%
summarize(count = n())
taxi_by_hour$weekday <- factor(taxi_by_hour$weekday, levels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))
taxi_by_hour$hour <- as.factor (taxi_by_hour$hour)
taxi_by_hour$hour <- factor(taxi_by_hour$hour,
levels = rev(c("0", "1","2", "3","4", "5","6", "7","8", "9","10",
"11","12", "13","14", "15","16",
"17","18", "19",
"20", "21","22", "23")))
ggplot(data = taxi_by_hour, aes(x = weekday, y = hour, fill = count/taxi_num_days)) +
geom_tile() +
scale_fill_gradient(low = taxi_light, high = taxi_dark,name = "Taxi Trips", labels = comma) +
theme_bw() +
my_theme +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.ticks.y=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y = element_text(vjust=-1.3),
axis.text.x = element_text(size = 15, face = "bold")) +
theme (legend.key = element_rect(fill = "black"),
legend.spacing.x = unit(0.2, 'cm'),
legend.text = element_text(family = "Courier", size = 8, face = "bold"),
legend.title = element_text(family = "Courier", size = 8, face = "bold"),
legend.background = element_rect(fill="white", size=1, linetype="solid",colour ="#969696")) +
scale_x_discrete (position = "top", expand = c(0,0)) +
scale_y_discrete(breaks = c(0,3,6,9,12,15,18,21),
labels = c("Midnight","3 am","6 am","9 am", "Noon", "3 pm", "6 pm", "9 pm")) +
labs (title ="Highest Weekday Taxi Traffic Between 6-8 pm, Later on Weekends",
subtitle = "Weekend Rush Drives Taxis Late Into The Night",
x = "",
y = "",
caption = "Chicago Taxi Trips Data\n 2015-2017")
During weekdays, high demand is during the day, peaking during evening rush hour, with Friday evening rush hour being the busiest. However, unlike on weekdays, on weekdays, demand stays high at night, and extends to the early hours of the next day on Saturday and Sunday. There is a strong arugment for varying taxi per mile rates by time of day, especially for weekends, to allow taxi drivers to be able to generate more revenue for servicing at late, often inconvenient hours, of the night.
 Â
divvy_by_duration <-
divvy_trips %>%
group_by(day_of_week, time_of_day) %>%
summarize(duration = median(TRIP.DURATION))
divvy_by_duration$day_of_week <- factor(divvy_by_duration$day_of_week, levels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))
divvy_by_duration$time_of_day <- factor(divvy_by_duration$time_of_day,
levels = rev(c("0", "1","2", "3","4", "5","6", "7","8", "9","10",
"11","12", "13","14", "15","16",
"17","18", "19",
"20", "21","22", "23")))
ggplot(data = divvy_by_duration, aes(x = day_of_week, y = time_of_day, fill = duration)) +
geom_tile()+
scale_fill_gradient(low = "#bdd7e7", high = "#2171b5" ,
name = "Median Trip\nDuration\n(In Seconds)", labels = comma) +
theme_bw() +
my_theme +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.ticks.y=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y = element_text(vjust=-1.3),
axis.text.x = element_text(size = 15, face = "bold")) +
theme (legend.key = element_rect(fill = "black"),
legend.spacing.x = unit(0.2, 'cm'),
legend.text = element_text(family = "Courier", size = 8, face = "bold"),
legend.title = element_text(family = "Courier", size = 8, face = "bold"),
legend.background = element_rect(fill="white", size=1, linetype="solid",colour ="#969696")) +
scale_x_discrete (position = "top", expand = c(0,0)) +
scale_y_discrete(breaks = c(0,3,6,9,12,15,18,21),
labels = c("Midnight","3 am","6 am","9 am", "Noon", "3 pm", "6 pm", "9 pm")) +
labs (title ="Longest Divvy Trips Occur on Weekends.",
subtitle = "Weekdays See Shorter Rides, Especially in Mornings & Evenings.",
x = "",
y = "",
caption = "Chicago Divvy Data\n 2015-2017")
Possibly due to increased tourist and visitor usage in Downtown/River area on weekends, the median trip duration is quite large compared to weekday median trip durations. Late night trips also seem to be of large durations, which may most likely be attributed to a very low number of riders in the first place, who may be interested in travelling for long durations at that time of day. Overall, this indicates that there are two distinct categories of users of divvy bikes: people who use divvy bikes to travel from/to work (or a bus/train stop) on weekdays, and people who use divvy bikes on weekends, probably for tourism.
 Â
divvy_by_gender <- divvy_trips %>%
filter(GENDER != '') %>%
group_by(hour_of_day = hour(date), GENDER) %>%
summarize(riders_by_hour = trunc(n() / (365*3)))
ggplot(data = divvy_by_gender) +
geom_col(data = subset(divvy_by_gender, GENDER == "Male"),
aes(x = hour_of_day, y = riders_by_hour, fill = GENDER), fill = divvy_male_color) +
geom_col(data = subset(divvy_by_gender, GENDER == "Female"),
aes(x = hour_of_day, y = -riders_by_hour, fill = GENDER), fill = divvy_female_color) +
scale_fill_brewer(palette = "Set1") +
theme_bw() +
coord_flip() +
geom_hline(yintercept=0, colour="white", lwd=1) + my_theme +
scale_y_continuous( labels = abs) +
annotate("label", x = 23.5, y = -140,
label = 'FEMALE RIDERS', color = divvy_female_color, fontface =2, family="Courier") +
annotate("label", x = 23.50, y = 250,
label = 'MALE RIDERS', color = divvy_male_color, fontface =2, family="Courier") +
labs(title ="Highest Demand For Divvy During Peak Morning & Evening Hours",
subtitle = "Significantly Fewer Female Riders, Especially At Night",
x = "", y = "Passengers Per Day",
caption = "Divvy Ridership Data\n 2015-2017") +
scale_x_continuous(sec.axis = dup_axis(), breaks = c(0,6,12,17,22),
labels = c("Midnight", "6 am", "Noon", "5 pm", "10 pm" )) +
theme(axis.title.x = element_text(hjust = 0.2))
#remove(divvy_by_gender)
Demand for divvy bikes peaks at morning and evening rush hours. There is a noticably higher usage of divvy bikes by men compared to women at all hours of the day. It’s also interesting to note that there are almost no women riding bikes between 1 am and 4 am.
   Â
 Â
# reference: https://ibecav.github.io/slopegraph/
p <- public_transport %>%
group_by(year, month, trip_identity) %>%
summarise(riders = sum(rides)) %>%
filter (trip_identity == "bus")
green_palette <- c("#467563", "#78c679", "#78c679", "#238443","#467563",
"#c2e699", "#78c679", "#238443","#78c679", "#78c679", "#78c679", "#238443")
warmer_months <- "#fd8d3c"
cooler_months <- "#005a32"
alternative <- c(cooler_months, cooler_months, warmer_months, warmer_months,
warmer_months, warmer_months, warmer_months,
warmer_months, warmer_months, warmer_months, cooler_months, cooler_months)
ggplot(p, aes(x = year, y = riders, group = month)) +
scale_x_discrete(labels = c('Rides Taken on Snow Days \n in Winter',
'Rides Taken on Non-Snow Days \n in Winter', "wow")) +
geom_line(aes(color = month), alpha = 1, size = 1, linetype = 1) +
geom_point(aes(color = month, alpha = 1), size = 1.5) +
scale_colour_manual(values=alternative) +
geom_label_repel(data = p %>% filter(year == "2015"),
aes(label = paste0(month), color = month, family = "Courier", fontface = 2) ,
hjust = "left",
fontface = 7,
size = 3,
nudge_x = -.45,
direction = "y") +
geom_label_repel(data = p %>% filter(year == "2017"),
aes(label = paste0(month), color = month, family = "Courier", fontface = 2) ,
hjust = "right",
fontface = 7,
size = 2.5,
nudge_x = .4,
direction = "y") +
#scale_x_discrete(position = "bottom") +
theme_bw() +
theme(legend.position = "none") +
theme(panel.border = element_blank()) +
#theme(axis.title.x = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank()) +
my_theme +
labs( subtitle = "July Has Low Bus Usage Compared To Other Warm Months",
title = "Warm Months Experience More Bus Usage Than Cool Months",
caption = "CTA Transit Data\n 2015 - 2017",
x = "2015 2016 2017",
y = "Total Bus Riders") +
theme(plot.title = element_text(hjust = 0, size = 24, face = "bold"),
plot.subtitle = element_text(hjust = 0, size = 20, face = "bold")) +
scale_y_continuous(labels = scales::comma)
The demand for buses has been falling between 2015 and 2017 as established earlier. However, further exploration reveals that this decline isn’t driver by any particular set of months or seasonality, but by an overall decline across each month. Demand in July is dissimilar to other warmer months, potentially due to school closures in July which might dampen demand. December, which also experiences some school closure, is similarly behind other cooler months.
 Â
winter_dates <-
weather %>%
filter((date >= "2015-01-01" & date <= "2015-03-20") |
(date >= "2015-12-21" & date <= "2016-03-20") |
(date >= "2016-12-21" & date <= "2017-03-20") |
(date >= "2017-12-21" & date <= "2017-12-31"))
snow_days <-
winter_dates %>%
filter (SNOW > 0)
no_snow_days <-
winter_dates %>%
filter (SNOW == 0)
train_snow_days <-
merge(snow_days, train_trips, by = "date") %>%
group_by(date) %>%
summarize (riders = sum(rides)) %>%
mutate (type = "TRAIN", day_type = "SNOW")
train_no_snow <-
merge(no_snow_days, train_trips, by = "date") %>%
group_by(date) %>%
summarize (riders = sum(rides)) %>%
mutate (type = "TRAIN", day_type = "NO SNOW")
bus_snow_days <-
merge(snow_days, bus_trips, by = "date") %>%
group_by(date) %>%
summarize (riders = sum(rides)) %>%
mutate (type = "BUS", day_type = "SNOW")
bus_no_snow <-
merge(no_snow_days, bus_trips, by = "date") %>%
group_by(date) %>%
summarize (riders = sum(rides)) %>%
mutate (type = "BUS", day_type = "NO SNOW")
snow_comparison <- do.call("rbind", list(train_snow_days, train_no_snow, bus_snow_days,bus_no_snow ))
snow_comparison$day_type <- factor(snow_comparison$day_type, levels = c("SNOW", "NO SNOW"))
remove (train_snow_days, train_no_snow, bus_snow_days,bus_no_snow, snow_days, no_snow_days)
ggplot(snow_comparison) +
geom_boxplot(aes(x = day_type, y = riders/1000, fill = type)) +
scale_x_discrete(labels = c('Rides Taken on Snow Days \n in Winter',
'Rides Taken on Non-Snow Days \n in Winter')) +
labs(title ="Snow Does NOT Have Much Effect on Ridership in Winter",
subtitle = "Cold Non-Snowy Days Seem As Unappealing To Riders",
x = "",
y = "Riders (Thousands)",
caption = "NCEI & CTA Rider Data\n 2015-2017") +
theme_bw() +
my_theme +
scale_fill_manual(values=c(bus_color, train_color)) +
annotate("label", x = 0.8, y = 200,
label = 'BUS', color = bus_color, fontface =2, family="Courier") +
annotate("label", x = 1.8, y = 120,
label = 'BUS', color = bus_color, fontface =2, family="Courier") +
annotate("label", x = 1.2, y = 750,
label = 'TRAIN', color = train_color, fontface =2, family="Courier") +
annotate("label", x = 2.2, y = 750,
label = 'TRAIN', color = train_color, fontface =2, family="Courier") +
theme(legend.position="none")
Comparing within cold days, it can be seen that consumers are indifferent between the presence & absence of snow. As such, snow doesn’t necessarily present a physical barrier that would make people want to take fewer buses or trains (on average), indicating a more resilient and inelastic demand for public transportation.
 Â
#1 week before week of lol
#2 week == effect of week of lol
#1 week after effect of week of lol
downtown_effected_buses <- c("3", "4", "12", "6", "10", "J14", "130", "146",
"126", "147", "29", "62", "36", "151", "60", "22")
#2015
lol_2015 <-
bus_trips %>%
filter(date >= "2015-07-20" & date <= "2015-08-16") %>%
select(route, date, rides)
lol_2015$days_to_lolla <- lol_2015$date - date("2015-07-31")
#2016
lol_2016 <-
bus_trips %>%
filter(date >= "2016-07-18" & date <= "2016-08-14") %>%
select(route, date, rides)
lol_2016$days_to_lolla <- lol_2016$date - date("2016-07-28")
#2017
lol_2017 <-
bus_trips %>%
filter(date >= "2017-07-24" & date <= "2017-08-20") %>%
select(route, date, rides)
lol_2017$days_to_lolla <- lol_2017$date - date("2017-08-03")
lol_total <- rbind(lol_2015, lol_2016, lol_2017)
lol_effected <-
lol_total %>%
filter(route %in% downtown_effected_buses) %>%
group_by(days_to_lolla) %>%
#summarise(riders = mean(rides))
summarise(riders = sum(rides))
lol_effected = lol_effected[-1,]
lol_effected <- lol_effected[-nrow(lol_effected),]
lol_uneffected <-
lol_total %>%
filter(!(route %in% downtown_effected_buses)) %>%
group_by(days_to_lolla) %>%
#summarise(riders = mean(rides))
summarise(riders = sum(rides))
lol_uneffected = lol_uneffected[-1,]
lol_uneffected <- lol_uneffected[-nrow(lol_uneffected),]
ymin=c(-Inf,-Inf)
ymax=c(Inf,Inf)
bus_downtown <- "#66c2a4"
bus_other <- "#2ca25f"
lolla_color <- "#f03b20"
img <- readPNG(here::here("lolla_image.png"))
g <- rasterGrob(img, interpolate=TRUE)
ggplot() +
stat_smooth(data = lol_effected, aes(x = days_to_lolla, y = riders/(3*1000)),
colour = bus_downtown, method = "lm",
formula = y ~ poly(x, 15), se = FALSE) +
stat_smooth(data = lol_uneffected, aes(x = days_to_lolla, y = riders/(3*1000)),
colour = bus_other, method = "lm",
formula = y ~ poly(x, 15), se = FALSE) +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = c(-14:20)) +
theme_bw() +
my_theme +
annotation_custom(g, xmin=0, xmax=4, ymin=700, ymax=800) +
scale_x_continuous(breaks = c(-10,-3,0,4,11, 14),
labels = c("10 days To\nLollapalooza","Road\nBlocks\nStart",
"Start Of\nLollapalooza\n",
"End of\nLollapalooza", "Road\nBlocks\nEnd",
"10 Days After\nEnd Of\nLollapalooza")) +
theme(axis.title.x=element_blank()) +
annotate("label", x = 13, y = 680,
label = 'Route-Unaffected\nBuses', color = bus_other,
fontface =2, family="Courier") +
annotate("label", x = 12, y = 220,
label = 'Route-Affected Buses', color = bus_downtown,
fontface =2, family="Courier") +
annotate("text", x = 1, y = 320, label = "Higher Trough\nThan Usual",
colour = lolla_color, face = "bold", size = 5, face = "bold", family="Courier" ) +
annotate("segment", xend = 2, x = 1, yend = 400, y = 350,
color=lolla_color,
arrow = arrow(length = unit(0.35, "cm"))) +
labs(title ="Regular Users Switch to Route-Unaffected Buses During Lollapalooza",
subtitle = "Users Revert to Regular Demand Patterns After Road Closures Removed",
x = "", y = "Bus Users (Thousands)",
caption = "CTA Bus Data\n 2015-2017")
We also see how inelastic this change in demand for public transportation is for events such as Lollapalooza. While there is some marginal shift from buses (potentially to trains - to avoid road traffic), for the most part, the buses with changed routes see little change in demand during the road-blockage period. Some increase in demand on non-effected buses indicates there is some shift, but it reverts to original patterns as soon as road blocks are removed.
   Â
 Â
divvy_trips$year <- year(divvy_trips$date)
divvy_trips$month <- month(divvy_trips$date)
divvy_trips <-
divvy_trips %>%
filter (year == 2017, month == 7) %>%
drop_na(FROM.LONGITUDE,FROM.LATITUDE)
divvy_from_sf <- st_as_sf(divvy_trips,
coords=c("FROM.LONGITUDE","FROM.LATITUDE"),
crs=4326,
agr="constant")
divvy_from_sf <- st_transform (divvy_from_sf, 32616)
joined_data <- st_join(divvy_from_sf, areas)
grouped_by_community <-
joined_data %>%
group_by(community) %>%
summarize (bike_starts = n())
area_groups <- st_join(areas, grouped_by_community)
ggplot() +
geom_sf(data = area_groups, aes(fill = bike_starts), colour = "black", show.legend = "line") +
#geom_sf_text(data = area_groups, aes(label = community.x),
# size = 0.65, colour = "white", family="Courier", check_overlap = TRUE) +
geom_sf(data = trains, colour = "red") +
theme_bw() +
my_theme_maps +
theme(panel.grid.major=element_line(colour="transparent")) +
labs (subtitle ="Highest Divvy Usage In Downtown & North Chicago",
title = "South-West Areas With No Trains Also Have No Divvys",
x = "",
y = "",
caption = "Chicago Divvy Trips July 2017\n*Red Lines Indicate Train Routes",
fill='Divvy Rides')
There is a clear case of the ‘rich getting richer’ when it comes to transportation access in Chicago. Areas with ample supply of train lines also have a large amount of divvy bikes, whereas areas, especially on the South/South-West side, albeit with lower population, have very little access to train routes, and no divvy bikes either. Areas in downtown have large divvy usage, and further exploration can be done to assess demand in that area by day of week to glean tourist traffic.
 Â
taxi_2015 <- read.csv (here::here("data", "2015_Taxi_Trips1.csv"))
taxi_2015 <-
taxi_2015 %>%
drop_na(Pickup.Centroid.Latitude, Pickup.Centroid.Longitude, Pickup.Centroid.Location,
Dropoff.Centroid.Latitude, Dropoff.Centroid.Longitude, Dropoff.Centroid..Location,
Trip.Total)
taxi_2015$Pickup.Centroid.Latitude = as.numeric(as.character(taxi_2015$Pickup.Centroid.Latitude))
taxi_2015$Dropoff.Centroid.Latitude = as.numeric(as.character(taxi_2015$Dropoff.Centroid.Latitude))
taxi_2015$Dropoff.Centroid.Longitude = as.numeric(taxi_2015$Dropoff.Centroid.Longitude)
taxi_2015 <-
taxi_2015 %>%
drop_na(Pickup.Centroid.Latitude) %>%
mutate_if(is.numeric, round, digits=4)
taxi_2015_grouped <-
taxi_2015 %>%
group_by(Pickup.Centroid.Longitude, Pickup.Centroid.Latitude) %>%
summarize(median_cost = median(Trip.Total), num_trips = n()) %>%
filter (median_cost < 100, num_trips > 10)
taxi_starts <- st_as_sf(taxi_2015_grouped, coords=c("Pickup.Centroid.Longitude","Pickup.Centroid.Latitude"),
agr="constant", crs=4326)
ggplot(data = taxi_starts) +
geom_sf(data = areas, colour = "white", fill = "black") +
geom_sf(aes(size = num_trips, colour = median_cost), show.legend = 'point') +
theme_bw() +
my_theme_maps +
theme(panel.grid.major=element_line(colour="transparent")) +
scale_colour_gradient(low = "#E8E013", high = "#FF1570",
name = "Median Trip Cost\n(In USD)", labels = comma) +
annotate("segment", x = -87.91, xend = -87.92, y = 41.97, yend = 41.93,
color="orange", linetype="twodash") +
annotate("text", x = -87.91, y = 41.92, label = "O'Hare \nAirport",
colour = "orange", face = "bold", size = 3, face = "bold", family="Courier" ) +
annotate("segment", x = -87.765, xend = -87.83, y = 41.785, yend = 41.82,
color="orange", linetype="twodash") +
annotate("text", x = -87.83, y = 41.83, label = "Midway \nAirport",
colour = "orange", face = "bold", size = 3, face = "bold", family="Courier" ) +
annotate("segment", x = -87.615, xend = -87.58, y = 41.895, yend = 41.93,
color="orange", linetype="twodash") +
annotate("text", x = -87.57, y = 41.94, label = "Downtown \nChicago",
colour = "orange", face = "bold", size = 3, face = "bold", family="Courier" ) +
#theme(legend.position = c(0.13,0.24)) +
scale_size_continuous(name = 'Number of Rides \n(From Location)', labels = comma) +
labs (title ="Airports & South Side Generate Highest Median Revenue",
subtitle = "But Most Taxi Traffic Is Concentrated in Downtown & North Side",
x = "",
y = "",
caption = "Chicago Taxi Trips\n2015 Data")
Naturally, trips from the airports (Midway & O’Hare) generate the highest median trip cost due to their large distances, and airport fee related costs. However, it is also interesting to see that a large number of other high cost trips happen in areas not serviced by train routes, such as in the far South/South-West sides of Chicago. Downtown, apart from having large Divvy & Bus/Train stops, also has high taxi traffic.
 Â
taxi_2015_grouped_distance <-
taxi_2015 %>%
group_by(Pickup.Centroid.Longitude, Pickup.Centroid.Latitude) %>%
summarize(median_trip_distance = median(Trip.Miles), num_trips = n()) %>%
filter (median_trip_distance > 0.06, num_trips > 10)
taxi_starts_distance <- st_as_sf(taxi_2015_grouped_distance,
coords=c("Pickup.Centroid.Longitude","Pickup.Centroid.Latitude"),
agr="constant", crs=4326)
ggplot(data = taxi_starts_distance) +
geom_sf(data = areas, colour = "white", fill = "black") +
geom_sf(aes(size = num_trips, colour = median_trip_distance), show.legend = 'point') +
theme_bw() +
my_theme_maps +
theme(panel.grid.major=element_line(colour="transparent")) +
scale_colour_gradient(low = "#E8E013", high = "#FF1570",
name = "Median Trip \nDistance\n(In Miles)", labels = comma) +
annotate("segment", x = -87.91, xend = -87.92, y = 41.97, yend = 41.93,
color="orange", linetype="twodash") +
annotate("text", x = -87.91, y = 41.92, label = "O'Hare \nAirport",
colour = "orange", face = "bold", size = 3, face = "bold", family="Courier" ) +
annotate("segment", x = -87.765, xend = -87.83, y = 41.785, yend = 41.82,
color="orange", linetype="twodash") +
annotate("text", x = -87.83, y = 41.83, label = "Midway \nAirport",
colour = "orange", face = "bold", size = 3, face = "bold", family="Courier" ) +
annotate("segment", x = -87.615, xend = -87.58, y = 41.895, yend = 41.93,
color="orange", linetype="twodash") +
annotate("text", x = -87.57, y = 41.94, label = "Downtown \nChicago",
colour = "orange", face = "bold", size = 3, face = "bold", family="Courier" ) +
#theme(legend.position = c(0.13,0.24)) +
scale_size_continuous(name = 'Number of Rides \n(From Location)', labels = comma) +
labs (subtitle ="Downtown has Shortest Rides",
title = "Excluding Airports, Highest Number of Long Distance Taxi Rides Are From Hyde Park. ",
x = "",
y = "",
caption = "Chicago Taxi Trips\n2015 Data")
Following from the above map, we see a similar trend with trip distance. An interesting point to note is that people in areas far off from downtown have longer trip distances, indicating they are travelling farther away, whereas trips in Downtown/North-Side are generally shorter, indicating movement near to downtown. This could potentially serve as an indicator of concentration of employment & other resources within a narrow geographical area.